########################################################################
# Produce figures and tables in 
# "Public attitudes toward genetic risk scoring in medicine and beyond"
# 02/2021
# Simone Zhang, Rebecca Johnson, John Novembre, Edward Freeland, 
# Dalton Conley
########################################################################

library(dplyr)
library(forcats)
library(ggplot2)
library(ggpubr)
library(ggthemes)
library(RColorBrewer)
library(rtf)
library(stringr)
library(tidyr)

options(scipen=999)

# Set ggplot theme
theme_set(theme_minimal())
theme_update(rect = element_rect(fill = "white", 
                                 colour = "black"))
# Load survey data
sample <- readRDS("sample_responses.RDS")
wtp <- readRDS("wtp.RDS")

# Path for storing outputs
out_path <- "output/"

############################
# FIGURE 1: Abstract Belief
############################
# Summarize responses
data_acceptable <- sample %>%
  # Filter out 1 missing response
  filter(!is.na(belief_acceptable)) %>%
  group_by(belief_acceptable) %>%
  count() %>%
  ungroup() %>%
  mutate(likert_level = row_number(),
         perc_accept = (n / sum(n)) * 100,
         xval = "Answers")

# Set color levels
numlevels <- nrow(data_acceptable)
pal <- brewer.pal((numlevels),"RdBu")
pal[ceiling(numlevels / 2)] <- "#FFF5F5"
legend.pal.df <- data.frame(col = pal,
                            likert_level = 1:numlevels)
legend.pal <- rev(pal)

# Produce figure
data_acceptable %>%
  # Join color reference
  left_join(legend.pal.df, by = "likert_level") %>%
  mutate(belief_acceptable = fct_rev(belief_acceptable)) %>%
  ggplot(aes(x = xval, y = perc_accept, fill = belief_acceptable, order = likert_level)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual("Percent", values = legend.pal) + 
  labs(title = "\"It's normal and acceptable to judge individuals on the basis of their genetic predispositions.\"",
       y = "Percent") + 
  guides(fill = guide_legend(reverse = TRUE)) +
  theme(plot.title = element_text(hjust = 0.01),
        axis.text = element_text(size = 8),
        axis.title = element_text(size = 8),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(size = 8)) + 
  scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 100)) + 
  coord_flip() 

ggsave("fig1_acceptable_to_judge.eps", plot = last_plot(), device = "eps", path = out_path,
               width = 8, height = 4, bg = "transparent")

############################
# FIGURE 2: Permissibility
############################
# Create tidy dataset of permissibility ratings 
perm_outcomes <- sample %>%
  select(resp_pin, starts_with("outcome_permit")) %>%
  gather(key = "setting", value = "permit_pgs", -resp_pin) %>%
  mutate(setting = str_remove(setting, "outcome_permit_")) 

perm_trait <- sample %>%
  # Add info on trait predicted in school and insurance vignettes
  mutate(rand_schooltrait = "IQ",
         rand_insurancetrait = "IQ") %>%
  select(resp_pin, contains("trait")) %>%
  gather(key = "setting", value = "rand_trait", -resp_pin) %>%
  mutate(setting = str_remove_all(setting, "^rand_|trait$")) 

perm_tidy <- perm_trait %>%
  left_join(perm_outcomes, by = c("resp_pin", "setting"))

# Subset to observations for Panel A, permissibility by setting
# using only observations where trait was IQ for comparability. 
perm_instit <- perm_tidy %>%
  # Select only observations where trait was IQ for greater comparability
  filter(rand_trait == "IQ") %>%
  # Summarize responses by setting
  group_by(setting) %>%
  summarize(prop_permit_pgs = mean(permit_pgs, na.rm = T),
            count_permit_pgs = sum(permit_pgs, na.rm = T),
            n_permit_pgs = sum(!is.na(permit_pgs))) %>%
  mutate(se = sqrt((prop_permit_pgs * (1 - prop_permit_pgs)) / n_permit_pgs),
         lower = prop_permit_pgs - (qnorm(1 - (0.05 / 2)) * se),
         upper = prop_permit_pgs + (qnorm(1 - (0.05 / 2)) * se)) 

# Generate plot
perm_instit_plot <- perm_instit %>%
  ggplot(aes(x = reorder(setting,  prop_permit_pgs), 
             y = prop_permit_pgs * 100,
             ymin = lower * 100, ymax = upper * 100,
             label = round(prop_permit_pgs * 100, digits = 0))) + 
  geom_bar(stat = "identity", fill = "lightsteelblue3") +
  geom_errorbar(width = .2) +
  labs(title = "Permissibility by setting",
       x ="Setting", y = "Percent permitting polygenic scores", tag = "A.") +
  geom_label(position = position_dodge(width = 1),
             size = 2,
             label.padding = unit(0.11, "lines"),
             show.legend = F) + 
  theme(plot.title = element_text(hjust = 0.5),
        axis.text = element_text(size = 8),
        axis.title = element_text(size = 8)) + 
  scale_y_continuous(limits = c(0, 100)) +
  # Add sample size
  geom_text(aes(y = 0, label = paste0("n = ", round(n_permit_pgs, digits = 0))), 
            vjust = 0, nudge_y = 1,
            size = 2.25,   
            color = "white")

perm_instit_plot

# Summarize permissibility by trait (Panel B) 
# Calculate means by trait pooling across in which it was randomized
perm_trait <- perm_tidy %>%
  # Filter to embryo and sperm/egg donor vignettes
  filter(setting %in% c("donor", "embryo")) %>%
  group_by(rand_trait) %>%
  summarize(prop_permit_pgs = mean(permit_pgs, na.rm = T),
            count_permit_pgs = sum(permit_pgs, na.rm = T),
            n_permit_pgs = sum(!is.na(permit_pgs))) %>%
  mutate(se = sqrt((prop_permit_pgs * (1 - prop_permit_pgs)) / n_permit_pgs),
         lower = prop_permit_pgs - (qnorm(1 - (0.05 / 2)) * se),
         upper = prop_permit_pgs + (qnorm(1 - (0.05 / 2)) * se))

perm_trait_plot <- perm_trait %>%
  ggplot(aes(x = reorder(rand_trait, prop_permit_pgs), y = prop_permit_pgs * 100,
             ymin = lower * 100, ymax = upper * 100,
             label = round(prop_permit_pgs * 100, digits = 0))) + 
  geom_bar(stat = "identity", fill = "lightsteelblue3") +
  geom_errorbar(width = .2) +
  labs(title = "Permissibility by trait",
       x ="Trait", y = "Percent permitting polygenic scores", tag = "B.") + 
  geom_label(position = position_dodge(width = 1),
             size = 2,
             label.padding = unit(0.11, "lines"),
             show.legend = F) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text = element_text(size = 8),
        axis.title = element_text(size = 8)) + 
  scale_y_continuous(limits=c(0,90)) +
  # Add sample size
  geom_text(aes(y = 0, label = paste0("n = ", round(n_permit_pgs, digits = 0))), 
            vjust = 0, nudge_y = 1,
            size = 2.25, 
            color = "white")

perm_trait_plot

# Combine Panels A and B into Figure 2
ggarrange(perm_instit_plot, perm_trait_plot, nrow = 1) + 
  theme_transparent(base_size = 12, base_family = "")

ggsave("fig2_permissibility.eps", plot = last_plot(), device = "eps", path = out_path,
       width = 8, height = 3.5, bg = "transparent")

# t-tests of diferences in proportions
run_prop_test_permit <- function(dat, variation, val_a, val_b) { 
  prop.test(x = dat[dat[[variation]] %in% c(val_a, val_b),]$count_permit_pgs,
            n = dat[dat[[variation]] %in% c(val_a, val_b),]$n_permit_pgs)
}

# By setting
run_prop_test_permit(perm_instit, "setting", "insurance", "school")
run_prop_test_permit(perm_instit, "setting", "school", "embryo")
run_prop_test_permit(perm_instit, "setting", "embryo", "donor")

# By trait
run_prop_test_permit(perm_trait, "rand_trait", "skin tone", "height")
run_prop_test_permit(perm_trait, "rand_trait", "height", "IQ")
run_prop_test_permit(perm_trait, "rand_trait", "skin tone", "IQ")
run_prop_test_permit(perm_trait, "rand_trait", "IQ", "diabetes")
run_prop_test_permit(perm_trait, "rand_trait", "diabetes", "schizophrenia")
run_prop_test_permit(perm_trait, "rand_trait", "IQ", "schizophrenia")

##########################################
# FIGURE 3: Willingness to provide sample
##########################################
# Summarize willingness to provide
# Pooling across randomizations
wtp_dat <- wtp %>%
  # Create new labels
  mutate(instit_label = case_when(institution_name_short == "DMV" ~ "Department of Motor Vehicles",
                                  institution_name_short == "Relative finder" ~ "Relative finder service",
                                  TRUE ~ as.character(institution_name_short))) %>%
  group_by(instit_label) %>%
  summarize(prop_provide = mean(provide, na.rm = T),
            count_provide = sum(provide, na.rm = T),
            n_provide = sum(!is.na(provide))) %>%
  mutate(se = sqrt((prop_provide * (1 - prop_provide)) / n_provide),
         lower = prop_provide - (qnorm(1 - (0.05 / 2)) * se),
         upper = prop_provide + (qnorm(1 - (0.05 / 2)) * se)) %>%
  arrange(desc(prop_provide))

# Generate plot
wtp_dat %>%
  ggplot(aes(x = reorder(instit_label, prop_provide), y = (100 * prop_provide),
             ymin = lower * 100, ymax = upper * 100,
             label = round(prop_provide * 100, digits = 0))) +
  geom_bar(stat = "identity", fill = "grey42") +
  geom_errorbar(width = .2) +
  coord_flip() +
  labs(title = "Willingness to provide genetic information",
       x ="Institution or service", y = "Percent willing") +
  geom_label(position = position_dodge(width = 1),
             size = 2,
             label.padding = unit(0.15, "lines"),
             show.legend = F) + 
  theme(plot.title = element_text(hjust = 0.5), 
        axis.title = element_text(size = 8),
        axis.text = element_text(size = 8)) + 
  geom_text(aes(y = 0, label = paste0("n = ", n_provide)),
            hjust = -0.1,
            size = 2, 
            color = "white")

ggsave("fig3_wtp.eps", plot = last_plot(), device = "eps", path = out_path,
       width = 5.5, height = 4.5, bg = "transparent")

# Tests of difference in proportions
run_prop_test_wtp <- function(dat, instit_a, instit_b) { 
  prop.test(x = dat[dat$instit_label %in% c(instit_a, instit_b),]$count_provide,
            n = dat[dat$instit_label %in% c(instit_a, instit_b),]$n_provide)
}

run_prop_test_wtp(wtp_dat, "Health care provider", "Relative finder service")
run_prop_test_wtp(wtp_dat, "Department of Motor Vehicles", "Life insurance provider")
run_prop_test_wtp(wtp_dat, "Life insurance provider", "Online music/video service")
run_prop_test_wtp(wtp_dat, "Online music/video service", "Lender")
run_prop_test_wtp(wtp_dat, "Online music/video service", "Social network")
run_prop_test_wtp(wtp_dat, "Lender", "Social network")

###############################################
# TABLE 1: Responses by respondent demographics 
###############################################
# Collapse select variable levels 
sample_coll <- sample %>%
  mutate(demo_racethnic = fct_collapse(demo_racethnic,
                                       `White, non-Hispanic` = "White, non-Hispanic",
                                       `Black, non-Hispanic` = "Black, non-Hispanic",
                                       `Hispanic` = "Hispanic",
                                       `Other` = c("Other, non-Hispanic", "2+, non-Hispanic","Asian, non-Hispanic")),
         belief_religion = fct_collapse(belief_religion,
                                        `Catholic` = "Catholic",
                                        `Protestant` = "Protestant", 
                                        `Other` = c("Jewish", "Other"),
                                        `None` = "None"),
         belief_partyid = fct_collapse(belief_partyid,
                                       `Republican` = "Republican",
                                       `Democrat` = "Democrat",
                                       `Independent` = "Independent",
                                       `Other/No Preference` = c("Other", "No preference")),
         demo_educ = fct_collapse(demo_educ,
                                  `HS or Less` = c("HS or less, no diploma", "HS diploma or GED"),
                                  `Some College` = "Some college, no degree", 
                                  `Associates`=  "Associates degree",
                                  `Bachelors` = "Bachelors degree",
                                  `Advanced` = "Advanced degree"),
         # Create dichotomous version of agreement variable, where responses indicating "somewhat agree" 
         # or more are collapsed into "agree." 
         belief_acceptable_dichot = ifelse(as.numeric(belief_acceptable) >= 5, TRUE, FALSE)) 

# Set demographic variables 
demo_vars <- c("demo_gender", "demo_racethnic", "demo_age", "demo_educ", 
               "belief_religion", "belief_partyid")

# Calculate proportion believing it is acceptable to judge based on
# genetic information given name of a demographic variable
get_prop_acceptable <- function(demo_var) {
  # Filter out where demo var is missing
  sample_coll %>%
    # Remove if didn't answer acceptability question
    filter(!is.na(belief_acceptable)) %>%
    # Remove if missing demographic info
    filter(!is.na(get(demo_var))) %>%
    group_by_at(vars(matches(demo_var))) %>%
    summarize(n_accept = n(),
              prop_acceptable = mean(belief_acceptable_dichot)) %>%
    mutate(comparison = demo_var) %>%
    rename(variable = demo_var) %>%
    mutate(variable = as.character(variable)) 
}

acceptable_props <- bind_rows(lapply(demo_vars, FUN = get_prop_acceptable))

# Summarize # of applications rated as permissible
permit_count_joined <- perm_tidy %>%
  group_by(resp_pin) %>%
  summarize(n_permit_answered = n(),
            count_permit = sum(permit_pgs)) %>%
  # Join respondent demographics
  left_join(sample_coll, by = "resp_pin") 

# Get mean count rated a permissible given name of a demographic variable
get_mean_permit <- function(demo_var) {
  permit_count_joined %>%
    # Remove if missing demographic info
    filter(!is.na(get(demo_var))) %>%
    group_by_at(vars(matches(demo_var))) %>%
    summarize(n_permit = n(),
              count_permit = mean(count_permit)) %>%
    mutate(comparison = demo_var) %>%
    rename(variable = demo_var) %>%
    mutate(variable = as.character(variable)) 
}

permit_means <- bind_rows(lapply(demo_vars, FUN = get_mean_permit))

# Summarize proportion of institutions/services respondent is willing to provide 
# genetic sample to 
wtp_joined <- wtp %>%
  group_by(resp_pin) %>%
  # Summarize proportion of institutions willing
  # Any missingness results in an NA
  summarize(prop_provide = mean(provide)) %>%
  left_join(sample_coll, by = "resp_pin") 

# Get proportion of institutions respondent is willing to provide to 
# given name of a demographic variable
get_prop_wtp <- function(demo_var) {
  # Filter out where demo var is missing
  wtp_joined %>%
    # remove 11 obs that didn't answer all willingness to provide questions
    filter(!is.na(prop_provide)) %>%
    filter(!is.na(get(demo_var))) %>%
    group_by_at(vars(matches(demo_var))) %>%
    summarize(n_wtp = n(),
              prop_provide = mean(prop_provide)) %>%
    mutate(comparison = demo_var) %>%
    rename(variable = demo_var) %>%
    mutate(variable = as.character(variable)) 
}

wtp_props <- bind_rows(lapply(demo_vars, FUN = get_prop_wtp))

# Get N per subgroup given name of a demographic variable
get_n_subgroup <- function(demo_var) {
  sample_coll %>%
    # Filter out where demo var is missing
    filter(!is.na(get(demo_var))) %>%
    group_by_at(vars(matches(demo_var))) %>%
    summarize(n_subgroup = n()) %>%
    mutate(comparison = demo_var) %>%
    rename(variable = demo_var) %>%
    mutate(variable = as.character(variable)) 
}
n_subgroups <- bind_rows(lapply(demo_vars, FUN = get_n_subgroup))

# Join everything together
means_joined <- acceptable_props %>%
  left_join(permit_means, by = c("variable", "comparison")) %>%
  left_join(wtp_props, by = c("variable", "comparison")) %>%
  left_join(n_subgroups, by = c("variable", "comparison"))

# Run Kruskal-Wallis test given demographic variable,
# name of outcome variable, and dataset
run_kw <- function(demo_var, outcome_var, dataset) {
  kw.out <- kruskal.test(as.formula(paste0(outcome_var, "~", demo_var)), data = dataset)
  return(c("outcome" = outcome_var,
           "demo" = demo_var,
           "kwchi_val" = unname(kw.out$statistic), 
           "p_val" = kw.out$p.value))
}

# Run tests for all 3 sets of outcomes and all demographic variables
kw_out <- bind_rows(
  bind_rows(!!!lapply(demo_vars, run_kw, "belief_acceptable_dichot", sample_coll)),
  bind_rows(!!!lapply(demo_vars, run_kw, "count_permit", permit_count_joined )),
  bind_rows(!!!lapply(demo_vars, run_kw, "prop_provide", wtp_joined))
)
kw_out

# Generate table of p-values from K-W tests
kw_pval <- kw_out %>% 
  dplyr::select(-kwchi_val) %>% 
  spread(key = "outcome", value = "p_val")

kw_pval

# Output all results to RTF
rtf_output <- paste0(out_path, "demo_tables.doc")
rtf <- RTF(rtf_output) 

addHeader(rtf,title = "Table of Means")
addTable(rtf,
         dplyr::select(means_joined, variable, n_subgroup, prop_acceptable, count_permit, prop_provide) %>%
           mutate_at(vars(prop_acceptable, count_permit, prop_provide), list(~formatC(signif(., 2), digits = 2, format = "fg", flag = "#"))),
         font.size=10,row.names=FALSE, NA.string="-" )

addNewLine(rtf)

addHeader(rtf,title = "Kruskal-Wallis p-values")  
addTable(rtf,
         kw_pval %>% 
           arrange(desc(demo)) %>%
           mutate(belief_acceptable_dichot = as.numeric(belief_acceptable_dichot),
                  count_permit = as.numeric(count_permit),
                  prop_provide = as.numeric(prop_provide)) %>%
           mutate_if(is.numeric, list(~formatC(signif(., 2), digits = 2, format = "fg", flag = "#"))),
         font.size=10,row.names=FALSE, NA.string="-" )

done(rtf)

##########################
# Online Appendix Tables 
#########################

# Pairwise Wilcoxon tests of subgroup differences
# for demographic variables where a significant difference was detected
# in the omnibus test

# Filter to comparisons where there was a signficant difference in
# omnibus test
kw_sig <- kw_out %>%
  filter(p_val < 0.05) %>%
  mutate(dataset = ifelse(outcome == "count_permit", "permit_count_joined", 
                          "wtp_joined"))

# Run pairwise tests given name of a demographic variable,
# name of the outcome variable, and name of the dataset 
run_w_test <- function(demo_var, outcome_var, dataset) {
  # Load dataset for analysis
  data_loaded <- get(dataset, envir = .GlobalEnv)
  # Conduct test with Benjamin-Hochberg adjustments for multiple comparisons
  w.out <- pairwise.wilcox.test(data_loaded[[outcome_var]],
                                data_loaded[[demo_var]], p.adjust.method = "BH")
  return(w.out$p.value)
  
}

# Run all pairwise tests
wilcox_tests <- mapply(FUN = run_w_test, kw_sig$demo, kw_sig$outcome, kw_sig$dataset)
names(wilcox_tests) <- paste("Outcome: ", kw_sig$outcome,
                             "; Variable: ", str_remove(names(wilcox_tests), "^demo_|^belief_"))
wilcox_tests

# Output to RTF
rtf <- RTF(paste0(out_path, "online_supplement_tables.doc")) 
addHeader(rtf,title = "Pairwise Wilcoxon tests")  

for(i in 1:length(wilcox_tests)) {
  addParagraph(rtf, names(wilcox_tests)[[i]])
  addNewLine(rtf)
  addTable(rtf,
           round(wilcox_tests[[i]], 4),
           font.size=10,row.names=TRUE, NA.string="-" )
  addNewLine(rtf)
}

done(rtf)

